#This code produces Table 8 and Figures 18 and 99 in appendix C

if(!dir.exists("figs")){dir.create("figs")}
if(!dir.exists("tabs")){dir.create("tabs")}

dir.create("tabs/k50")
dir.create("figs/k50")

library(tidyverse)
library(arabicStemR)
library(tidytext)
library(lubridate)
library(ggplot2)
library(quanteda)
library(stm)
library(stats)
library(ggthemes)
library(ggpubr)

rm(list = ls())
gc()

# 50 topics -------------------------------------------------------------
load("k50.RData")

meta = meta %>% mutate(document = article) %>% select(- article)

beta = tidy(topic_model)
gamma = tidy(topic_model, matrix = "gamma")

gamma = left_join(gamma, meta, by = "document")
gc()

terms_final = tibble(topic = character(),
                     terms = character())

labelTopics(topic_model, topics = 22, n = 30)

terms_final = terms_final %>% 
  add_row(topic = "communication", terms = c("added, said, communicate, details, electronic")) %>% 
  add_row(topic = "Assad diplomacy", terms = c("Syria, nation, people, president, participate, Arab")) %>% 
  add_row(topic = "ISIS", terms = c("terrorism, organization, ISIS, group, armed")) %>% 
  add_row(topic = "temperature", terms = c("region, degree, south, temperature, increase")) %>% 
  add_row(topic = "public services", terms = c("citizen, work, nation, government, services, economy")) %>% 
  add_row(topic = "weather", terms = c("govenorate, agriculture, [names of regions], farmer, season")) %>% 
  add_row(topic = "foreign fighters", terms = c("news, British, Saudi, arms, discover, intelligence")) %>% 
  add_row(topic = "United States", terms = c("America, united, states, president, Washington, Obama, military")) %>% 
  add_row(topic = "media", terms = c("media, ask, truth, report, investigation, channel")) %>% 
  add_row(topic = "conspiracies and plots", terms = c("Syria, Arab, people, conspiracy, Zionist, intervention, resistance")) %>% 
  add_row(topic = "Assad", terms = c("president, Asad, mister, Bashar, Syria, people")) %>% 
  add_row(topic = "legislation", terms = c("article, law, declaration, council, number, legislation")) %>% 
  add_row(topic = "attacks", terms = c("armed, group, terrorist, car, fire, security, citizen")) %>% 
  add_row(topic = "speeches", terms = c("Syria, states, said, commitee, day")) %>% 
  add_row(topic = "victims", terms = c("aid, humanitarian, camp, refugee, provide")) %>% 
  add_row(topic = "Arab uprisings", terms = c("Egypt, Tunisia, security, protest(tazahar), people, protest(ehtejaj)")) %>% 
  add_row(topic = "election winners", terms = c("[winners' names], mister, sheikh, president, public")) %>% 
  add_row(topic = "investigations", terms = c("investigation, report, information, crime, chemical, committee")) %>% 
  add_row(topic = "finance", terms = c("dollar, public, million, billion, oil, Europe, economy")) %>% 
  add_row(topic = "Yemen", terms = c("Yemen, enemy, Saud, military, plane, bombing")) %>% 
  add_row(topic = "sports", terms = c("China, India, Korea, Brazil, Africa, world, Asia, sport")) %>% 
  add_row(topic = "accidents", terms = c("Aleppo, road, security, turn off, fire")) %>% 
  add_row(topic = "economy", terms = c("economy, industry, investment, trade, company, projects")) %>% 
  add_row(topic = "diplomacy", terms = c("president, relations, mister, visit, meeting, foreign")) %>% 
  add_row(topic = "religion", terms = c("Islam, religion, Patriarch, Muslim, Christian, Sheikh")) %>% 
  add_row(topic = "transportation", terms = c("Jordan, chemical, plane, sea, use, Libya, border, transport")) %>% 
  add_row(topic = "natural resources", terms = c("water, oil, company, electricity, gas, project, energy")) %>% 
  add_row(topic = "national unity", terms = c("nation, army, martyrs, sons, people, security")) %>% 
  add_row(topic = "Lebanon", terms = c("Lebanon, resistance, Israel, army, nation, government")) %>% 
  add_row(topic = "finance2", terms = c("Lira, public, Syrian, money, bank, company, prices")) %>% 
  add_row(topic = "bureaucracy", terms = c("council, public, project, minister/ministry, committee")) %>% 
  add_row(topic = "education", terms = c("university, education, students, teach, study")) %>% 
  add_row(topic = "terrorism", terms = c("terrorism, terror, army, armed, group, destroy")) %>% 
  add_row(topic = "Israel and Palestine", terms = c("Israel, Palestine, occupation, people, enemy, Zionist")) %>% 
  add_row(topic = "Ba'th party", terms = c("nation, party, politics, people, work, discussion, leadership, meeting")) %>% 
  add_row(topic = "Iraq", terms = c("Iraq, Baghdad, America, kill, injure")) %>% 
  add_row(topic = "Syrian opposition", terms = c("politics, discussions, crisis, nation, government, opposition, foreign")) %>% 
  add_row(topic = "Russia", terms = c("Syria, Russia(n), states, foreign, crisis, politics")) %>% 
  add_row(topic = "regional politics", terms = c("Arab, Palestine, Iraq, Kuwait, president, brotherly, Qatar")) %>% 
  add_row(topic = "regional peace", terms = c("peace, region, east, Europe, middle, union, foreign, Israel")) %>% 
  add_row(topic = "elections", terms = c("election, constitution, people, presidency, council, committee")) %>% 
  add_row(topic = "illnesses", terms = c("children, hospital, case, doctor, injured, child")) %>% 
  add_row(topic = "terrorism2", terms = c("terrorism, states, terrorist, armed, Saudi, west, group, support")) %>% 
  add_row(topic = "Iran", terms = c("Iran, region, politics, nuclear, Islam")) %>% 
  add_row(topic = "Israel and Palestine2", terms = c("Israel, Palestine, occupation, Jerusalem, city, occupied")) %>% 
  add_row(topic = "Turkey", terms = c("Turkey, Erdogan, government, party, people, justice")) %>% 
  add_row(topic = "international community", terms = c("nations, investigation, united, decision, security, committee, report")) %>% 
  add_row(topic = "culture", terms = c("culture, tourism, Syria, Arab, youth, participate, exhibit")) %>% 
  add_row(topic = "parliament", terms = c("assembly, law, project, enemies, ministers, session, people")) %>% 
  add_row(topic = "courts and trials", terms = c("court, article, adjudicate, deny, lawyer"))
  
topic_avg = gamma %>% 
  group_by(topic) %>% 
  summarise(avg = mean(gamma)) %>% 
  arrange(-avg)

terms_final = terms_final %>% mutate(topic2 = 1:nrow(terms_final)) %>% left_join(topic_avg, by = c("topic2" = "topic")) %>% select(-topic2)

names(terms_final) = c("Topic Label", "High Probability Terms", "Expected Proportion")

terms_final %>% arrange(desc(`Expected Proportion`)) %>% 
  xtable::xtable(caption = "Topic labels, highest probability terms, and expected proportion for n = 50 topics",
                 label = "tab:topics") %>% 
  xtable::print.xtable(include.rownames = F, size = "footnotesize",
                       file = "tabs/k50/topics.tex")

gc()

topics = tibble(topic = 1:50, `Topic Label` = terms_final$`Topic Label`)

gamma = gamma %>% ungroup %>% left_join(topics)

gamma = gamma %>% select(-topic) %>% rename(topic = `Topic Label`)

gamma = gamma %>% 
  select(document, topic, date, gamma) %>% 
  filter(nchar(topic) > 2) %>% 
  group_by(topic, date) %>% 
  summarise(avg = mean(gamma))


gamma2 = gamma %>% 
  spread(topic, avg)

gamma3 = gamma2 %>% 
  mutate(week = floor_date(date, "week")) %>% 
  select(date, week, everything()) %>% 
  group_by(week) %>% 
  mutate_if(.predicate = is.numeric, .funs = mean)


names(gamma3)[3:ncol(gamma3)] = paste0(names(gamma3)[3:ncol(gamma3)], "_fit")

gamma3 = gamma3 %>% left_join(gamma2)

plot_fun = function(y1){
  y2 = paste0(y1, "_fit")
  title = tools::toTitleCase(y1)
  
  df = gamma3[, c("date", y1, y2)] %>% as.data.frame()
  
  p = ggplot() + 
    geom_line(aes(x = df$date, y = df[,y1], colour = y1)) + 
    geom_line(aes(x = df$date, y = df[,y2], colour = y2)) + 
    geom_vline(xintercept = dmy("15-03-2011"), linetype = 2) + 
    scale_colour_manual(values = c("darkgrey", "black")) + 
    labs(title = title, y = expression(paste("Average  ", gamma)), x = "Year") + theme_few() + 
    theme(legend.position = "none", title = element_text(size = 20),
          axis.text = element_text(size = 14)) + 
    coord_cartesian(ylim = c(0, 0.25)) + 
    scale_x_date(date_breaks = "2 year", date_labels = "%y") + 
    scale_y_continuous(breaks = c(0, 0.10, 0.20))
  p %>% ggsave(filename = paste0("figs/k50/", y1, ".pdf"), width = 6, height = 5)
  return(plot)
}



plot_names = names(gamma3)[!str_detect(names(gamma3), "fit|date|week")]

for(i in 1:length(plot_names)){
  plot_fun(y1 = plot_names[i])
}
